<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2018 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Example Walk-Through</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-hide-toc.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="Analytics" href="index.html" />
    <link rel="prev" title="Modeling" href="modeling.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: user-guide -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><b><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html"> 概述</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/index.html"> 入门指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/campaign.html">MySQL 客户数据</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/nytimes-xml.html">纽约时报 XML 数据推送</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/stocks.html">股票选择</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/fitbit.html">物联网 IoT 设备数据</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-preparation/index.html"> 数据预处理</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/concepts.html">      概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/directives/index.html">      数据处理指令</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/catalog-lookup.html">catalog-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/change-column-case.html">change-column-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/changing-case.html">changing-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cleanse-column-names.html">cleanse-column-names</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/columns-replace.html">columns-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/copy.html">copy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cut-character.html">cut-character</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/decode.html">decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/diff-date.html">diff-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/drop.html">drop</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/encode.html">encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/extract-regex-groups.html">extract-regex-groups</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fail.html">fail</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fill-null-or-empty.html">fill-null-or-empty</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-matched.html">filter-row-if-matched</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-true.html">filter-row-if-true</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-rows-on.html">filter-rows-on</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/find-and-replace.html">find-and-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/flatten.html">flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-date.html">format-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-unix-timestamp.html">format-unix-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/generate-uuid.html">generate-uuid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/hash.html">hash</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/increment-variable.html">increment-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/index-split.html">index-split</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/invoke-http.html">invoke-http</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/json-path.html">json-path</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/keep.html">keep</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-number.html">mask-number</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-shuffle.html">mask-shuffle</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/merge.html">merge</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro-file.html">parse-as-avro-file</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro.html">parse-as-avro</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-csv.html">parse-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-date.html">parse-as-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-excel.html">parse-as-excel</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-fixed-length.html">parse-as-fixed-length</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-hl7.html">parse-as-hl7</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-json.html">parse-as-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-log.html">parse-as-log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-simple-date.html">parse-as-simple-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-xml.html">parse-as-xml</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-timestamp.html">parse-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-xml-to-json.html">parse-xml-to-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/quantize.html">quantize</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/rename.html">rename</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/send-to-error.html">send-to-error</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-charset.html">set-charset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-column.html">set-column</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-columns.html">set-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-record-delim.html">set-record-delim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-type.html">set-type</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-variable.html">set-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-by-separator.html">split-by-separator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-email.html">split-email</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-columns.html">split-to-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-rows.html">split-to-rows</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-url.html">split-url</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/stemming.html">stemming</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/swap.html">swap</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/table-lookup.html">table-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-distance.html">text-distance</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-metric.html">text-metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/trim.html">trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-decode.html">url-decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-encode.html">url-encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-csv.html">write-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-map.html">write-as-json-map</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-object.html">write-as-json-object</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/xpath.html">xpath</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/functions/index.html">      函数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/json-functions.html">JSON 函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/type-functions.html">类型函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/geofence-functions.html">地理围栏函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/dq-functions.html">数据质量函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/date-functions.html">日期函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/ddl-functions.html">DDL 函数</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/service/index.html">      服务</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/admin.html">行政和管理服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connection-properties.html">连接属性</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connections.html">连接服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/execution.html">数据处理指令执行</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/request.html">请求格式规范</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/schema-registry.html">Schema 注册库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/services.html">数据预处理服务</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/performance.html">性能</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/exclusion-and-aliasing.html">排除与别名</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/concepts-design.html"> 概念与设计</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/getting-started.html"> 入门指南</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/studio.html"> CDAP 数据流设计器</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/creating-pipelines.html"> 创建数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/running-pipelines.html"> 运行数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugin-management.html"> 插件管理</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugins/index.html"> 插件参考</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 数据分析</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="concepts.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="feature-gen.html"> Feature Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modeling.html"> Modeling</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#"> Example</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="example-walk-through">
<span id="user-guide-mmds-example"></span><h1>Example Walk-Through<a class="headerlink" href="#example-walk-through" title="Permalink to this headline">🔗</a></h1>
<p>This example walks you through using CDAP Sandbox to train a model that predicts the home sale prices for unsold homes on the market.</p>
<p>The data source for this example is 2017 real estate sales for a few cities in the San Francisco Bay Area,
which you can download <a class="reference external" href="https://hub.cdap.io/v2/packages/datapack-realestate-sales/1.0.0/sales.tsv">here</a>.
Place the file on your local machine.</p>
<div class="section" id="creating-an-experiment">
<h2>Creating an Experiment<a class="headerlink" href="#creating-an-experiment" title="Permalink to this headline">🔗</a></h2>
<p>The first step is to navigate to the <em>Analytics</em> tab at the top of CDAP.</p>
<p>After that, you will be asked to create a new experiment.
The first step in creating an experiment is selecting your data source.
Navigate to and select the sales data that you downloaded for this example. This takes you into the 数据预处理 view.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-start.png"><img alt="../_images/mmds-example-start.png" class="bordered-image" src="../_images/mmds-example-start.png" style="width: 800px;" /></a>
</div>
<p>The 数据预处理 view contains a sample of the data from your input file.
Start by parsing the body of our data as CSV with tab as a delimiter and the first row as a header.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-parse.png"><img alt="../_images/mmds-example-parse.png" class="bordered-image" src="../_images/mmds-example-parse.png" style="width: 800px;" /></a>
</div>
<p>Notice the immediate feedback about data changes. Delete the original body column, which is not needed.</p>
<p>Next, set the data types. Change the <cite>price</cite>, <cite>sqft</cite>, and <cite>lot_sqft</cite> columns to be of type <em>double</em>,
and the <cite>year_built</cite> column to be of type <em>int</em>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-change-type.png"><img alt="../_images/mmds-example-change-type.png" class="bordered-image" src="../_images/mmds-example-change-type.png" style="width: 800px;" /></a>
</div>
<p>After setting the data types, set the outcome of our new experiment to be <cite>price</cite>.
Then, finish creating the experiment by giving a name and description.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-create-experiment.png"><img alt="../_images/mmds-example-create-experiment.png" class="bordered-image" src="../_images/mmds-example-create-experiment.png" style="width: 800px;" /></a>
</div>
</div>
<div class="section" id="creating-a-model">
<h2>Creating a model<a class="headerlink" href="#creating-a-model" title="Permalink to this headline">🔗</a></h2>
<p>Next, create a model in the experiment by providing a name and a description for the model.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-model-create.png"><img alt="../_images/mmds-example-model-create.png" class="bordered-image" src="../_images/mmds-example-model-create.png" style="width: 800px;" /></a>
</div>
<p>On the next page, click the button to split the data source into training and test datasets.
The split is generated after several seconds with some computed statistics for examination.</p>
<p>At this point, you can examine information about each feature, as well as the outcome.
Notice that most of the prices are concentrated in a relatively small bucket with a small number of outliers that are way outside the normal range.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-price-split.png"><img alt="../_images/mmds-example-price-split.png" class="bordered-image" src="../_images/mmds-example-price-split.png" style="width: 800px;" /></a>
</div>
</div>
<div class="section" id="cleaning-up-the-data">
<h2>Cleaning up the data<a class="headerlink" href="#cleaning-up-the-data" title="Permalink to this headline">🔗</a></h2>
<p>It is a good idea to filter out the extreme outliers.
Click the <em>Edit</em> link near the top of the screen, which takes you to the 数据预处理 view.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-filter-price.png"><img alt="../_images/mmds-example-filter-price.png" class="bordered-image" src="../_images/mmds-example-filter-price.png" style="width: 800px;" /></a>
</div>
<p>Add a filter on <cite>price</cite> to keep only the rows that have a price between one and ten million dollars, and then split the data again.
Notice the price distribution is now more sensible.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-split-good.png"><img alt="../_images/mmds-example-split-good.png" class="bordered-image" src="../_images/mmds-example-split-good.png" style="width: 800px;" /></a>
</div>
</div>
<div class="section" id="training-a-model">
<h2>Training a model<a class="headerlink" href="#training-a-model" title="Permalink to this headline">🔗</a></h2>
<p>At this point, we could go through our features and perform more cleanup.
For the sake of brevity, we move on to train a model.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-dtree-train.png"><img alt="../_images/mmds-example-dtree-train.png" class="bordered-image" src="../_images/mmds-example-dtree-train.png" style="width: 800px;" /></a>
</div>
<p>To start training the model, select the <em>Decision Tree Regression</em> algorithm and use the default parameters,
which should complete training in less than a minute and take you to the Experiment Detail view.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-imageo reference internal image-reference" href="../_images/mmds-example-model-list.png"><img alt="../_images/mmds-example-model-list.png" class="bordered-imageo" src="../_images/mmds-example-model-list.png" style="width: 800px;" /></a>
</div>
<p>The Experiment Detail view shows general information about the experiment and lists all the models in the experiment.
You can see the evaluation metrics and explore the predictions the model made during the evaluation process.
For example, you can run a SQL query to see which cities have the worst predictions.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-prediction-explore.png"><img alt="../_images/mmds-example-prediction-explore.png" class="bordered-image" src="../_images/mmds-example-prediction-explore.png" style="width: 800px;" /></a>
</div>
<p>At this point, you would typically explore the predictions or try training other models with different modeling algorithms
and parameters to try to generate a model with the least amount of error.</p>
<p>After you have created a model that you are happy with, you can create a scoring pipeline with the model in Pipeline Studio.</p>
</div>
<div class="section" id="making-predicitons">
<span id="user-guide-mmds-example-scoring-pipeline"></span><h2>Making Predicitons<a class="headerlink" href="#making-predicitons" title="Permalink to this headline">🔗</a></h2>
<p>You will be making predictions on sample real estate listings.
Download the <a class="reference external" href="https://hub.cdap.io/v2/packages/datapack-realestate-listings/1.0.0/listings.tsv">listing file</a> and place it on your local machine.</p>
<p>Now click the <cite>Creating a scoring pipeline</cite> button to use the model we just trained to create a scoring pipeline.
This brings you to the Pipeline Studio with part of a pipeline preconfigured for you.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-start.png"><img alt="../_images/mmds-example-scoring-pipeline-start.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-start.png" style="width: 800px;" /></a>
</div>
<p>For illustrative purposes, the pipeline uses the training data as a source.
The first step is to change the File source so that it reads from the listings data instead of the sales data.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-input.png"><img alt="../_images/mmds-example-scoring-pipeline-input.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-input.png" style="width: 800px;" /></a>
</div>
<p>The listings data is similar to our sales data except that it does not have a field for the price.
Price is what this pipeline will be predicting. Since there is no price field,
you need to update the Wrangler plugin to remove directives that operate on price and remove price from the output schema.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-wrangler.png"><img alt="../_images/mmds-example-scoring-pipeline-wrangler.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-wrangler.png" style="width: 800px;" /></a>
</div>
<p>Next, configure the MLPredictor plugin to add a field called <cite>predicted_price</cite>.
Because the model is a regression model, the prediction field must be of type <cite>double</cite>.
When using a classification model, the prediction field would need to be of type string.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-predictor.png"><img alt="../_images/mmds-example-scoring-pipeline-predictor.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-predictor.png" style="width: 800px;" /></a>
</div>
<p>Lastly, you need to add a sink to write the predictions to.
On the left panel, select <cite>Parquet Time Partitioned Dataset</cite> and connect the predictor to the sink.
The sink needs to be configured with just one field: the dataset name. Enter <cite>listings_price_predictions</cite> as the dataset name.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-add-sink.png"><img alt="../_images/mmds-example-scoring-pipeline-add-sink.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-add-sink.png" style="width: 800px;" /></a>
</div>
<p>To deploy the pipeline, click the <cite>Deploy</cite> button near the top right of the screen.
This brings up the pipeline detail view, where you can manually run the scoring pipeline to test that it works.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-scoring-pipeline-run.png"><img alt="../_images/mmds-example-scoring-pipeline-run.png" class="bordered-image" src="../_images/mmds-example-scoring-pipeline-run.png" style="width: 800px;" /></a>
</div>
<p>After the pipeline successfully runs, click the sink. At the top-right of pop-up window, click the button to see the dataset detail page.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-listings-predictions.png"><img alt="../_images/mmds-example-listings-predictions.png" class="bordered-image" src="../_images/mmds-example-listings-predictions.png" style="width: 800px;" /></a>
</div>
<p>Near the top right of this page, click the eye icon to open the SQL view and explore the dataset.
From here, you can query the dataset to explore the predictions that were just made.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-example-listings-predictions-explore.png"><img alt="../_images/mmds-example-listings-predictions-explore.png" class="bordered-image" src="../_images/mmds-example-listings-predictions-explore.png" style="width: 800px;" /></a>
</div>
<p>With minor changes, you can schedule a pipeline like this to run on a periodic interval, reading from a different directory each time.
In this way, you can use the model you just trained to make price predictions for new listings as they become available.</p>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Modeling" href="modeling.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right">&mdash;</div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>